library(STRINGdb)
## Loading required package: png
## Loading required package: sqldf
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
## Loading required package: plyr
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## 
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## 
## The following object is masked from 'package:base':
## 
##     union
## 
## Loading required package: RCurl
## Loading required package: bitops
## Loading required package: plotrix
## Loading required package: RColorBrewer
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## 
## The following object is masked from 'package:plotrix':
## 
##     plotCI
## 
## The following object is masked from 'package:stats':
## 
##     lowess
## 
## Loading required package: hash
## hash-2.2.6 provided by Decision Patterns

Liste des espèces

head(get_STRING_species(version='10', species_name=NULL))
##   species_id                             official_name
## 1        394               Sinorhizobium fredii NGR234
## 2        882 Desulfovibrio vulgaris str. Hildenborough
## 3        883    Desulfovibrio vulgaris str. Miyazaki F
## 4       1140          Synechococcus elongatus PCC 7942
## 5       1148                Synechocystis sp. PCC 6803
## 6       2110                     Mycoplasma agalactiae
##                           compact_name  kingdom      type
## 1          Sinorhizobium fredii NGR234 bacteria periphery
## 2 Desulfovibrio vulgaris Hildenborough bacteria periphery
## 3      Desulfovibrio vulgaris Miyazaki bacteria periphery
## 4      Synechococcus elongatus PCC7942 bacteria periphery
## 5            Synechocystis sp. PCC6803 bacteria      core
## 6                Mycoplasma agalactiae bacteria periphery

Instantiation de string reference class à partir de l’id taxonomique (9606 pour human). threshold par défault est 400 (d’après la doc). input_directory permet de spécifier où seront stocker les fichiers téléchargés.

sdb = STRINGdb$new(version='10', species=9606, score_threshold=0, input_directory='repo_data')

Téléchargement des interactions

g=sdb$get_graph()
g
## IGRAPH UN-- 19247 4274001 -- 
## + attr: name (v/c), neighborhood (e/n), neighborhood_transferred
## | (e/n), fusion (e/n), cooccurence (e/n), homology (e/n),
## | coexpression (e/n), coexpression_transferred (e/n), experiments
## | (e/n), experiments_transferred (e/n), database (e/n),
## | database_transferred (e/n), textmining (e/n),
## | textmining_transferred (e/n), combined_score (e/n)
## + edges (vertex names):
## [1] 9606.ENSP00000003084--9606.ENSP00000301645
## [2] 9606.ENSP00000003084--9606.ENSP00000301653
## [3] 9606.ENSP00000003084--9606.ENSP00000301732
## + ... omitted several edges
head(read.table('repo_data/9606__protein_links.tsv.gz', header=TRUE))
##               protein1             protein2 neighborhood
## 1 9606.ENSP00000003084 9606.ENSP00000301645            0
## 2 9606.ENSP00000003084 9606.ENSP00000301653            0
## 3 9606.ENSP00000003084 9606.ENSP00000301732            0
## 4 9606.ENSP00000003084 9606.ENSP00000301891            0
## 5 9606.ENSP00000003084 9606.ENSP00000301905            0
## 6 9606.ENSP00000003084 9606.ENSP00000301956            0
##   neighborhood_transferred fusion cooccurence homology coexpression
## 1                        0      0           0        0            0
## 2                        0      0           0        0            0
## 3                        0      0           0      609            0
## 4                        0      0           0        0            0
## 5                        0      0           0        0            0
## 6                        0      0           0        0            0
##   coexpression_transferred experiments experiments_transferred database
## 1                        0           0                       0        0
## 2                        0           0                       0        0
## 3                        0           0                     101        0
## 4                        0           0                       0        0
## 5                        0           0                      75        0
## 6                        0           0                       0        0
##   database_transferred textmining textmining_transferred combined_score
## 1                    0        163                    129            239
## 2                    0        241                      0            240
## 3                    0        413                    287            292
## 4                    0         71                    246            269
## 5                    0         92                    161            233
## 6                    0         56                    155            168

Trop facile !

Liste des méthodes disponibles

STRINGdb$methods()
##  [1] "add_diff_exp_color"                 
##  [2] "add_proteins_description"           
##  [3] "benchmark_ppi"                      
##  [4] "benchmark_ppi_pathway_view"         
##  [5] "callSuper"                          
##  [6] "copy"                               
##  [7] "enrichment_heatmap"                 
##  [8] "export"                             
##  [9] "field"                              
## [10] "get_aliases"                        
## [11] "get_annotations"                    
## [12] "get_annotations_desc"               
## [13] "get_bioc_graph"                     
## [14] "get_clusters"                       
## [15] "get_enrichment"                     
## [16] "get_graph"                          
## [17] "get_homologs"                       
## [18] "get_homologs_besthits"              
## [19] "get_homology_graph"                 
## [20] "get_interactions"                   
## [21] "get_link"                           
## [22] "get_neighbors"                      
## [23] "get_pathways_benchmarking_blackList"
## [24] "get_png"                            
## [25] "get_ppi_enrichment"                 
## [26] "get_ppi_enrichment_full"            
## [27] "get_proteins"                       
## [28] "get_pubmed"                         
## [29] "get_pubmed_interaction"             
## [30] "get_subnetwork"                     
## [31] "get_summary"                        
## [32] "get_term_proteins"                  
## [33] "getClass"                           
## [34] "getRefClass"                        
## [35] "import"                             
## [36] "initFields"                         
## [37] "initialize"                         
## [38] "load"                               
## [39] "load_all"                           
## [40] "map"                                
## [41] "mp"                                 
## [42] "plot_network"                       
## [43] "plot_ppi_enrichment"                
## [44] "post_payload"                       
## [45] "remove_homologous_interactions"     
## [46] "set_background"                     
## [47] "show"                               
## [48] "show#envRefClass"                   
## [49] "trace"                              
## [50] "untrace"                            
## [51] "usingMethods"

Les protéines et leurs annotations

sp=sdb$get_proteins()
head(sp)
##    protein_external_id  preferred_name protein_size
## 1 9606.ENSP00000376213        KIAA1683         1367
## 2 9606.ENSP00000464271 ENSG00000265690          145
## 3 9606.ENSP00000472357        FLJ00297          141
## 4 9606.ENSP00000007390            TSR3          312
## 5 9606.ENSP00000000233            ARF5          180
## 6 9606.ENSP00000259467            PDCL          301
##                                                                                                                                                                                                                                                            annotation
## 1                                                                                                                                                                                                                                                            KIAA1683
## 2                                                                                                                                                                                                                                            annotation not available
## 3                                                                                                                                                                                                                                                    FLJ00297 protein
## 4                                                                                                                          TSR3, 20S rRNA accumulation, homolog (S. cerevisiae); Probable pre-rRNA processing protein involved in ribosome biogenesis (By similarity)
## 5 ADP-ribosylation factor 5; GTP-binding protein that functions as an allosteric activator of the cholera toxin catalytic subunit, an ADP- ribosyltransferase. Involved in protein trafficking; may modulate vesicle budding and uncoating within the Golgi apparatus
## 6                                                                                                                                                                                                                                                      phosducin-like

map identifiers

p53=sdb$mp('p53')
p53
## [1] "9606.ENSP00000269305"

Homologues chez la souris

sdb$get_homologs(p53,10090)
##            STRING_id_a              STRING_id_b bitscore start_a end_a
## 1 9606.ENSP00000269305 10090.ENSMUSP00000101269      270      97   352
## 2 9606.ENSP00000269305 10090.ENSMUSP00000104298      578       1   393
## 3 9606.ENSP00000269305 10090.ENSMUSP00000110965      269      94   350
##   start_b end_b size_b
## 1     114   378    638
## 2       4   390    390
## 3     162   423    680

Les voisins

head(sdb$get_neighbors(p53))
## [1] "9606.ENSP00000003084" "9606.ENSP00000003100" "9606.ENSP00000003302"
## [4] "9606.ENSP00000004921" "9606.ENSP00000005257" "9606.ENSP00000005260"

les interactions

sdb$get_interactions( c(p53, '9606.ENSP00000003084') )
##                   from                   to neighborhood
## 1 9606.ENSP00000003084 9606.ENSP00000269305            0
##   neighborhood_transferred fusion cooccurence homology coexpression
## 1                        0      0           0        0            0
##   coexpression_transferred experiments experiments_transferred database
## 1                        0           0                       0        0
##   database_transferred textmining textmining_transferred combined_score
## 1                    0        392                      0            392

pubmed

head( sdb$get_pubmed_interaction( p53, '9606.ENSP00000003084' ) )
## [1] "PMID:9895335" "PMID:9860803" "PMID:9847078" "PMID:9821189"
## [5] "PMID:9819431" "PMID:9811682"

Autres méthodes

# recupère les GO, InterPro, KEGG associés à chaque sommet
head(sdb$get_annotations())
##              STRING_id    term_id category type
## 1 9606.ENSP00000372313 GO:0004674 Function     
## 2 9606.ENSP00000372313 GO:0004672 Function     
## 3 9606.ENSP00000372313 GO:0016773 Function     
## 4 9606.ENSP00000372313 GO:0016772 Function     
## 5 9606.ENSP00000372313 GO:0016740 Function     
## 6 9606.ENSP00000372313 GO:0003824 Function
#
# sdb$get_bioc_graph() # trop groumand en mémoire
#
head(sdb$get_pubmed(p53))
## [1] "PMID:9990839" "PMID:9990486" "PMID:9990480" "PMID:9990085"
## [5] "PMID:9990040" "PMID:9989991"

Test avec le peptidoglycan de coli

get_STRING_species(version='10', species_name="Escherichia coli")
##      species_id                             official_name
## 236      155864      Escherichia coli O157:H7 str. EDL933
## 295      199310                   Escherichia coli CFT073
## 632      316385  Escherichia coli str. K-12 substr. DH10B
## 633      316407  Escherichia coli str. K-12 substr. W3110
## 753      362663                      Escherichia coli 536
## 1060     469008                Escherichia coli BL21(DE3)
## 1102     481805                Escherichia coli ATCC 8739
## 1155     511145 Escherichia coli str. K-12 substr. MG1655
##                     compact_name  kingdom      type
## 236      Escherichia coli O157H7 bacteria periphery
## 295      Escherichia coli CFT073 bacteria periphery
## 632   Escherichia coli K12 DH10B bacteria periphery
## 633   Escherichia coli K12 W3110 bacteria periphery
## 753         Escherichia coli 536 bacteria periphery
## 1060    Escherichia coli BL21DE3 bacteria periphery
## 1102   Escherichia coli ATCC8739 bacteria periphery
## 1155 Escherichia coli K12 MG1655 bacteria      core
sdb = STRINGdb$new(version='10', species=511145, score_threshold=350, input_directory='repo_data')
core_ids=c('MURA','MURB','MURC','MURD','MURE','MURF','MRAY','MURG' )
core=sdb$mp(core_ids)
links_core=sdb$get_interactions(core)
head(links_core)
##           from           to neighborhood neighborhood_transferred fusion
## 1 511145.b0085 511145.b0086          859                      513    899
## 2 511145.b0085 511145.b0087          859                      635      0
## 3 511145.b0085 511145.b0088          859                      513      0
## 4 511145.b0085 511145.b0090          859                      513      0
## 5 511145.b0085 511145.b0091          681                      642      0
## 6 511145.b0085 511145.b3189            0                      120      0
##   cooccurence homology coexpression coexpression_transferred experiments
## 1         771        0          195                        0         310
## 2         751        0          188                        0           0
## 3         772        0          449                        0           0
## 4         734        0          101                       84         852
## 5         768        0            0                        0           0
## 6         752        0            0                        0           0
##   experiments_transferred database database_transferred textmining
## 1                       0      900                    0          0
## 2                       0        0                    0        869
## 3                       0      900                    0        931
## 4                      85        0                    0        870
## 5                       0        0                    0        410
## 6                       0        0                    0        829
##   textmining_transferred combined_score
## 1                    176            999
## 2                    710            999
## 3                    710            999
## 4                    709            999
## 5                    284            986
## 6                    696            987
sdb$plot_network(core)

biosynthesis_ids=c('MURA','MURB','MURC','MURD','MURE','MURF','MRAY','MURG','GLMU','MRSA','GLMS','BACA','YAES','DDLA','DDLB','DADX','MURI','DAPF','DAPE','ARGD','DAPD','DAPB','DAPA','ASD','LYSC' )
biosynthesis=sdb$mp(biosynthesis_ids)
links_biosynthesis = sdb$get_interactions(biosynthesis)
sdb$plot_network(biosynthesis)

Test avec quelques gènes du pathway TRNA-CHARGING-PWY de BioCyc

tRNAcharging_ids = c('ALAS','ARGS','ASNS','ASPS','CYSS','GLTX','GLYQ','GLYS','HISS','ILES')
tRNAcharging=sdb$mp(tRNAcharging_ids)
links_tRNA = sdb$get_interactions(tRNAcharging)
head(links_tRNA)
##           from           to neighborhood neighborhood_transferred fusion
## 1 511145.b0026 511145.b0526            0                        0      0
## 2 511145.b0026 511145.b0930            0                        0      0
## 3 511145.b0026 511145.b1866            0                        0      0
## 4 511145.b0026 511145.b1876            0                        0      0
## 5 511145.b0026 511145.b2400            0                        0      0
## 6 511145.b0026 511145.b2514            0                        0      0
##   cooccurence homology coexpression coexpression_transferred experiments
## 1           0        0            0                      329           0
## 2           0        0          225                      228           0
## 3         150        0            0                      403           0
## 4           0        0            0                      243           0
## 5         575        0            0                      233           0
## 6         403        0          105                      209           0
##   experiments_transferred database database_transferred textmining
## 1                       0        0                    0        346
## 2                     850        0                  463          0
## 3                       0        0                    0          0
## 4                     878        0                  463        637
## 5                     695        0                  416        658
## 6                       0        0                    0        656
##   textmining_transferred combined_score
## 1                    282            657
## 2                    151            951
## 3                     65            484
## 4                    631            992
## 5                    488            987
## 6                    683            945
sdb$plot_network(tRNAcharging)